suppressPackageStartupMessages(library(tidyverse))
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.2.3

Settings

data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'

wd <- "~/Google Drive/My Drive/Analysis/METTL2A/"
setwd(wd)

figdir <- paste0(wd, 'Figures/DRS_m3C_RNAs/Isoforms/Mapping_m3Csites_isoforms/')
tabledir <- paste0(wd, 'Tables/DRS_m3C_RNAs/Isoforms/')

theme_set(
  theme_classic(base_size = 7) +
    theme(legend.position = 'bottom')
)

theme_mapping <- 
  theme_classic(base_size = 7) +
  theme(
    legend.position = 'none', 
    axis.text.x = element_blank(),
    axis.title = element_blank(),
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank()
  )

Functions

paste_wd <- function(path) {
  
  paste0(wd, path)
  
}

read_bedgz <- function(path) {
  
  read_tsv(path, col_names = c('seq_name', 'start', 'end', 'value'))
  
}

bed12_to_exon <- function(bed12_df) {
  
  bed12_df |> 
    mutate(
      blockSizes = str_split(blockSizes, ",") %>% map(as.integer),
      blockStarts = str_split(blockStarts, ",") %>% map(as.integer)
    ) %>%
    rowwise() %>%
    summarise(
      chrom = chrom,
      start = list(start + unlist(blockStarts)),
      end = list(start + unlist(blockSizes)),
      name = name,
      score = score,
      strand = strand
    ) %>%
    unnest(cols = c(start, end))
  
}

calc_exonpos_onegene <- function(genename) {
  transcript2gene |> 
    filter(gene_name == genename) |> 
    left_join(transcripts_annotation) |> 
    mutate(name = paste(transcript_id, transcript_name, sep = '|')) |> 
    #rename(name = transcript_id) |> 
    bed12_to_exon()
}

read_bedgz_onegene <- function(bedgz, genename) {
  
  df_filtered <- 
    transcript2gene |> 
    filter(gene_name == genename) |> 
    left_join(transcripts_annotation)
  
  minstart <- min(df_filtered$start, na.rm = TRUE) - 1
  maxend   <- max(df_filtered$end  , na.rm = TRUE)
  
  read_bedgz(bedgz) |> 
    filter(seq_name == unique(df_filtered$seqname)) |>
    filter(end >= minstart & start <= maxend) |> 
    filter(start >= minstart | end <= maxend) |> 
    mutate(
      start = ifelse(start < minstart, minstart, start),
      end   = ifelse(end   > maxend  , maxend, end)
    ) |>
    mutate(
      basename = basename(bedgz)|> str_remove_all('[0-9]+_DrTaniue_[0-9]_|.bedGraph.gz')
    ) |> 
    separate(basename, into = c('type', 'si', 'rep', 'strand'), sep = '_')
  
  
}

calc_m3C_sites_genomicpos <- function(genename) {
  
  transcript2gene |> 
    filter(gene_name == genename) |> 
    inner_join(DRS_m3Csites) |> 
    inner_join(transcripts_annotation) |> 
    select(
      transcript_id, transcript_type, transcript_name, 
      kmer_middle, strand, start, end, blockSizes, blockStarts
    )  |> 
    separate_rows(c(blockSizes, blockStarts)) |> 
    group_by(transcript_id, transcript_type, transcript_name, kmer_middle) |> 
    mutate(
      # - strandは逆から
      exon_num = ifelse(strand == '+', row_number(), max(row_number()) - row_number() + 1)
    ) |> 
    arrange(transcript_id, kmer_middle, exon_num) |> 
    mutate(cum_size = cumsum(blockSizes)) |> 
    mutate(pos_in_exon = cum_size - kmer_middle) |> 
    filter(pos_in_exon > 0) |> 
    filter(pos_in_exon == min(pos_in_exon)) |>
    mutate(genomic_pos = start + blockStarts |> as.numeric() + pos_in_exon) |> 
    select(transcript_id, kmer_middle, genomic_pos) |> 
    mutate(start = genomic_pos - 1, end = genomic_pos)
}

Read data

transcripts_annotation <- 
  read_bed12('Tables/Database/Espresso_AsPC1_annotation.bed' |> paste_wd()) |> 
  rename(transcript_id = name)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
transcripts_annotation
## # A tibble: 36,687 × 12
##    chrom   start     end transcript_id  score strand thickStart thickEnd itemRgb
##    <chr>   <dbl>   <dbl> <chr>          <dbl> <chr>       <dbl>    <dbl> <chr>  
##  1 chrX  2913613 2929275 ENST000002178…     0 -              NA       NA 255,0,0
##  2 chrX  2914496 2917878 ENST000004813…     0 -              NA       NA 255,0,0
##  3 chrX  2920380 2929279 ENST000004948…     0 -              NA       NA 255,0,0
##  4 chrX  2920675 2929339 ENST000005593…     0 -              NA       NA 255,0,0
##  5 chrX  2951737 2964288 ENST000006827…     0 -              NA       NA 255,0,0
##  6 chrX  3604339 3713649 ENST000002628…     0 -              NA       NA 255,0,0
##  7 chrX  3610950 3612255 ENST000004966…     0 -              NA       NA 255,0,0
##  8 chrX  3817527 3843566 ENST000006620…     0 -              NA       NA 255,0,0
##  9 chrX  3817527 3843508 ENST000004909…     0 -              NA       NA 255,0,0
## 10 chrX  3817527 3822622 ENST000004610…     0 -              NA       NA 255,0,0
## # ℹ 36,677 more rows
## # ℹ 3 more variables: blockCount <dbl>, blockSizes <chr>, blockStarts <chr>
transcript2gene <- 
  read_tsv(
    'Tables/Espresso/espresso_deseq2_genetype2_isDET_2024-04-18.tsv' |> paste_wd()
  ) |> 
  select(transcript_id:gene_name, genetype2, common_DETs, seqname)
## Rows: 36717 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): transcript_id, transcript_type, transcript_name, gene_id, gene_typ...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
transcript2gene
## # A tibble: 36,717 × 9
##    transcript_id     transcript_type transcript_name gene_id gene_type gene_name
##    <chr>             <chr>           <chr>           <chr>   <chr>     <chr>    
##  1 ENST00000498442.1 retained_intron CRBN-212        ENSG00… protein_… CRBN     
##  2 ENST00000459840.5 retained_intron CRBN-205        ENSG00… protein_… CRBN     
##  3 ENST00000231948.9 protein_coding  CRBN-201        ENSG00… protein_… CRBN     
##  4 ENST00000432408.6 protein_coding  CRBN-203        ENSG00… protein_… CRBN     
##  5 ENST00000339437.… protein_coding  TRNT1-203       ENSG00… protein_… TRNT1    
##  6 ENST00000488263.5 retained_intron CRBN-209        ENSG00… protein_… CRBN     
##  7 ENST00000420393.5 protein_coding  TRNT1-207       ENSG00… protein_… TRNT1    
##  8 ENST00000698415.1 retained_intron TRNT1-230       ENSG00… protein_… TRNT1    
##  9 ENST00000450014.1 protein_coding  CRBN-204        ENSG00… protein_… CRBN     
## 10 ENST00000698416.1 retained_intron TRNT1-231       ENSG00… protein_… TRNT1    
## # ℹ 36,707 more rows
## # ℹ 3 more variables: genetype2 <chr>, common_DETs <chr>, seqname <chr>
# m3Csites_genome <- 
# #  read_bed12('Tables/DRS_m3C_sites_in_genome/m3C_sites_kmer.bed') |> 
# #  separate(name, into = c('transcript_id', 'tr_pos'), sep = '[|]')
#   read_tsv('')
#   m3Csites_genome

DRS_m3Csites <- 
  read_tsv(
    'Tables/DRS_m3C_sites/DRS_methylated_positions_relative_range_2024-04-22.tsv' |> 
      paste_wd()
  )  |>
  select(transcript_id, kmer_middle)
## Rows: 489 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): transcript_id, gene_name, seqname, gene_type, ref_kmer, genetype2
## dbl (7): kmer_start, kmer_end, kmer_middle, length, rel_kmer_start, rel_kmer...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DRS_m3Csites
## # A tibble: 489 × 2
##    transcript_id     kmer_middle
##    <chr>                   <dbl>
##  1 ENST00000429711.7         425
##  2 ENST00000647248.2         383
##  3 ENST00000647248.2         384
##  4 ENST00000389680.2          60
##  5 ENST00000389680.2          78
##  6 ENST00000389680.2          96
##  7 ENST00000389680.2         151
##  8 ENST00000389680.2         156
##  9 ENST00000389680.2         157
## 10 ENST00000389680.2         158
## # ℹ 479 more rows
S100A4 <- 
#  fs::dir_ls('Alignment/Minimap2/Spliced/', glob = '*_minus.bedGraph.gz') |> 
  fs::dir_ls('Alignment/Minimap2/Spliced/' |> paste_wd(), regexp = '[0-9].bedGraph.gz') |> 
  map(read_bedgz_onegene, genename = 'S100A4') |> 
  reduce(bind_rows)
## Joining with `by = join_by(transcript_id)`
## Rows: 10567723 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1047 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 5100720 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 814 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 8467096 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 972 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10230564 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1302 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 9915590 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1241 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 8838508 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1235 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10213758 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1031 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 9147872 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 901 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10419811 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 999 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
S100A4
## # A tibble: 9,542 × 8
##    seq_name     start       end   value type      si    rep   strand
##    <chr>        <dbl>     <dbl>   <dbl> <chr>     <chr> <chr> <chr> 
##  1 chr1     153543611 153543611   0.807 siMETTL2A I     N1    <NA>  
##  2 chr1     153543611 153543612   4.04  siMETTL2A I     N1    <NA>  
##  3 chr1     153543612 153543613  59.3   siMETTL2A I     N1    <NA>  
##  4 chr1     153543613 153543614  60.2   siMETTL2A I     N1    <NA>  
##  5 chr1     153543614 153543615  77.9   siMETTL2A I     N1    <NA>  
##  6 chr1     153543615 153543616  97.3   siMETTL2A I     N1    <NA>  
##  7 chr1     153543616 153543617 103.    siMETTL2A I     N1    <NA>  
##  8 chr1     153543617 153543618 199.    siMETTL2A I     N1    <NA>  
##  9 chr1     153543618 153543619 311.    siMETTL2A I     N1    <NA>  
## 10 chr1     153543619 153543620 320.    siMETTL2A I     N1    <NA>  
## # ℹ 9,532 more rows
S100A4_exons <- 
  calc_exonpos_onegene('S100A4') |> 
  mutate(tr_num = name |> factor() |> as.numeric())
## Joining with `by = join_by(transcript_id)`
S100A4_exons
## # A tibble: 39 × 7
##    chrom     start       end name                         score strand tr_num
##    <chr>     <dbl>     <dbl> <chr>                        <dbl> <chr>   <dbl>
##  1 chr1  153543612 153543923 ENST00000481009.1|S100A4-206     0 -           6
##  2 chr1  153544653 153545097 ENST00000481009.1|S100A4-206     0 -           6
##  3 chr1  153543620 153543923 ENST00000368714.1|S100A4-202     0 -           2
##  4 chr1  153544653 153544809 ENST00000368714.1|S100A4-202     0 -           2
##  5 chr1  153550064 153550136 ENST00000368714.1|S100A4-202     0 -           2
##  6 chr1  153543612 153543923 ENST00000368715.5|S100A4-203     0 -           3
##  7 chr1  153544653 153544809 ENST00000368715.5|S100A4-203     0 -           3
##  8 chr1  153544957 153545063 ENST00000368715.5|S100A4-203     0 -           3
##  9 chr1  153543620 153543923 ENST00000368716.9|S100A4-204     0 -           4
## 10 chr1  153544653 153544809 ENST00000368716.9|S100A4-204     0 -           4
## # ℹ 29 more rows
S100A4_m3Csites <- 
  calc_m3C_sites_genomicpos('S100A4')
## Joining with `by = join_by(transcript_id)`
## Joining with `by = join_by(transcript_id)`
## Adding missing grouping variables: `transcript_type`, `transcript_name`
S100A4_m3Csites
## # A tibble: 6 × 7
## # Groups:   transcript_id, transcript_type, transcript_name, kmer_middle [6]
##   transcript_type transcript_name transcript_id   kmer_middle genomic_pos  start
##   <chr>           <chr>           <chr>                 <dbl>       <dbl>  <dbl>
## 1 protein_coding  S100A4-201      ENST0000035433…         102   153544806 1.54e8
## 2 protein_coding  S100A4-201      ENST0000035433…         103   153544805 1.54e8
## 3 protein_coding  S100A4-204      ENST0000036871…         110   153544753 1.54e8
## 4 protein_coding  S100A4-204      ENST0000036871…         318   153543815 1.54e8
## 5 protein_coding  S100A4-204      ENST0000036871…         406   153543727 1.54e8
## 6 protein_coding  S100A4-204      ENST0000036871…         448   153543685 1.54e8
## # ℹ 1 more variable: end <dbl>
xlim <- c(min(S100A4$start), max(S100A4$end))
xlim
## [1] 153543611 153550136
p1 <- 
  S100A4 |> 
  filter(rep == 'N1') |> 
  ggplot(aes()) +
  geom_rect(aes(xmin = start, xmax = end, ymin = 0, ymax = value, fill = si)) +
  facet_wrap( ~ si, ncol = 1) +
  scale_x_continuous(limits = xlim) +
  scale_fill_manual(values = c('#8C8C8C', '#37D9CC', '#A3A3F9')) +
  theme_mapping 

p2 <- 
  S100A4_m3Csites |> 
  ggplot(aes()) +
  geom_tile(aes(
    x = (start + end) / 2,
    y = transcript_name, 
    width = (end - start),
    height = 1
  )) +
  scale_x_continuous(limits = xlim) +
  theme_mapping
p2

S100A4_transcripts <- 
  S100A4_exons |> 
  group_by(name, strand) |>
  reframe(start = min(start), end = max(end))

p3 <- 
  S100A4_exons |>
  ggplot(aes(
    x = (start + end) / 2,
    y = name, 
    width = (end - start),
  )) +
  geom_tile(height = .8) +
  geom_tile(data = S100A4_transcripts, height = .1) +
  scale_x_continuous(limits = xlim) +
  theme_mapping

plot <- 
  p1 / p2 / p3 +
  plot_layout(heights = c(6,1,2))
ggsave(filename = 'temp.pdf', plot, width = 18, height = 8, units = 'cm')

Plot each gene

plot_mapping_with_m3Csites_and_trinfo <- function(.genename, layout = c(6,1,2)) {
  
  mappinginfo <- 
    #  fs::dir_ls('Alignment/Minimap2/Spliced/', glob = '*_minus.bedGraph.gz') |> 
    fs::dir_ls('Alignment/Minimap2/Spliced/' |> paste_wd(), regexp = '[0-9].bedGraph.gz') |> 
    map(read_bedgz_onegene, genename = .genename) |> 
    reduce(bind_rows)

  exon_position <- 
    calc_exonpos_onegene(.genename) |> 
    mutate(tr_num = name |> factor() |> as.numeric())
  print(exon_position |> head(20))

  m3Csites_genomicpos <- 
    calc_m3C_sites_genomicpos(.genename)

  transcriptinfo <- 
    exon_position |> 
    group_by(name, strand) |>
    reframe(start = min(start), end = max(end))
  print(transcriptinfo)
  
  xlim <- c(min(mappinginfo$start), max(mappinginfo$end))
  xlim
  
  p1 <- 
    mappinginfo |> 
    filter(rep == 'N1') |> 
    ggplot(aes()) +
    geom_rect(aes(xmin = start, xmax = end, ymin = 0, ymax = value, fill = si)) +
    facet_wrap( ~ si, ncol = 1) +
    scale_x_continuous(limits = xlim) +
    scale_fill_manual(values = c('#8C8C8C', '#37D9CC', '#A3A3F9')) +
    theme_mapping 
  
  p2 <- 
    m3Csites_genomicpos |> 
    ggplot(aes()) +
    geom_tile(aes(
      x = (start + end) / 2,
      y = transcript_name, 
      width = (end - start),
      height = 1
    )) +
    scale_x_continuous(limits = xlim) +
    theme_mapping
  p2
  
  p3 <- 
    exon_position |>
    ggplot(aes(
      x = (start + end) / 2,
      y = name, 
      width = (end - start),
    )) +
    geom_tile(height = .8) +
    geom_tile(data = transcriptinfo, height = .1) +
    scale_x_continuous(limits = xlim) +
    theme_mapping
  
  plot <- 
    p1 / p2 / p3 +
    plot_layout(heights = layout)
  print(plot)
  
  dir.create(paste0(wd, figdir), showWarnings = FALSE, recursive = TRUE)
  ggsave(
    filename = paste0(wd, figdir, .genename, '_mapping.pdf'), plot, 
    width = 18, height = 8, units = 'cm'
  )
  
}

plot_mapping_with_m3Csites_and_trinfo('S100A4')
## Joining with `by = join_by(transcript_id)`
## Rows: 10567723 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1047 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 5100720 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 814 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 8467096 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 972 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10230564 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1302 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 9915590 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1241 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 8838508 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1235 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10213758 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1031 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 9147872 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 901 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10419811 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 999 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## # A tibble: 20 × 7
##    chrom     start       end name                         score strand tr_num
##    <chr>     <dbl>     <dbl> <chr>                        <dbl> <chr>   <dbl>
##  1 chr1  153543612 153543923 ENST00000481009.1|S100A4-206     0 -           6
##  2 chr1  153544653 153545097 ENST00000481009.1|S100A4-206     0 -           6
##  3 chr1  153543620 153543923 ENST00000368714.1|S100A4-202     0 -           2
##  4 chr1  153544653 153544809 ENST00000368714.1|S100A4-202     0 -           2
##  5 chr1  153550064 153550136 ENST00000368714.1|S100A4-202     0 -           2
##  6 chr1  153543612 153543923 ENST00000368715.5|S100A4-203     0 -           3
##  7 chr1  153544653 153544809 ENST00000368715.5|S100A4-203     0 -           3
##  8 chr1  153544957 153545063 ENST00000368715.5|S100A4-203     0 -           3
##  9 chr1  153543620 153543923 ENST00000368716.9|S100A4-204     0 -           4
## 10 chr1  153544653 153544809 ENST00000368716.9|S100A4-204     0 -           4
## 11 chr1  153545752 153545806 ENST00000368716.9|S100A4-204     0 -           4
## 12 chr1  153543612 153543939 ENST00000468373.1|S100A4-205     0 -           5
## 13 chr1  153544653 153544809 ENST00000468373.1|S100A4-205     0 -           5
## 14 chr1  153545752 153545806 ENST00000468373.1|S100A4-205     0 -           5
## 15 chr1  153543612 153543923 ENST00000354332.8|S100A4-201     0 -           1
## 16 chr1  153544653 153544809 ENST00000354332.8|S100A4-201     0 -           1
## 17 chr1  153545469 153545518 ENST00000354332.8|S100A4-201     0 -           1
## 18 chr1  153545752 153545802 ENST00000354332.8|S100A4-201     0 -           1
## 19 chr1  153543621 153543896 ESPRESSO:chr1:1668:4|NA          0 -           9
## 20 chr1  153544653 153544809 ESPRESSO:chr1:1668:4|NA          0 -           9
## Joining with `by = join_by(transcript_id)`
## Joining with `by = join_by(transcript_id)`
## Adding missing grouping variables: `transcript_type`, `transcript_name`
## # A tibble: 12 × 4
##    name                         strand     start       end
##    <chr>                        <chr>      <dbl>     <dbl>
##  1 ENST00000354332.8|S100A4-201 -      153543612 153545802
##  2 ENST00000368714.1|S100A4-202 -      153543620 153550136
##  3 ENST00000368715.5|S100A4-203 -      153543612 153545063
##  4 ENST00000368716.9|S100A4-204 -      153543620 153545806
##  5 ENST00000468373.1|S100A4-205 -      153543612 153545806
##  6 ENST00000481009.1|S100A4-206 -      153543612 153545097
##  7 ESPRESSO:chr1:1668:12|NA     -      153543621 153545057
##  8 ESPRESSO:chr1:1668:14|NA     -      153543621 153545792
##  9 ESPRESSO:chr1:1668:4|NA      -      153543621 153545791
## 10 ESPRESSO:chr1:1668:5|NA      -      153543621 153545084
## 11 ESPRESSO:chr1:1668:7|NA      -      153543621 153545791
## 12 ESPRESSO:chr1:1668:8|NA      -      153543621 153545791

plot_mapping_with_m3Csites_and_trinfo('RPS24')
## Joining with `by = join_by(transcript_id)`
## Rows: 10567723 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 902 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 5100720 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 614 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 8467096 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 938 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10230564 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 939 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 9915590 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 984 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 8838508 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 910 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10213758 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 948 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 9147872 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1122 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## Rows: 10419811 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): seq_name dbl (3): start, end, value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 1023 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining with `by = join_by(transcript_id)`
## # A tibble: 20 × 7
##    chrom    start      end name                        score strand tr_num
##    <chr>    <dbl>    <dbl> <chr>                       <dbl> <chr>   <dbl>
##  1 chr10 78033885 78033904 ENST00000466129.6|RPS24-207     0 +           3
##  2 chr10 78035351 78035417 ENST00000466129.6|RPS24-207     0 +           3
##  3 chr10 78035510 78036529 ENST00000466129.6|RPS24-207     0 +           3
##  4 chr10 78033916 78033986 ENST00000482069.5|RPS24-212     0 +           6
##  5 chr10 78035351 78035417 ENST00000482069.5|RPS24-212     0 +           6
##  6 chr10 78035510 78035720 ENST00000482069.5|RPS24-212     0 +           6
##  7 chr10 78037193 78037304 ENST00000482069.5|RPS24-212     0 +           6
##  8 chr10 78040614 78040696 ENST00000482069.5|RPS24-212     0 +           6
##  9 chr10 78033862 78033904 ENST00000372360.9|RPS24-202     0 +           1
## 10 chr10 78035351 78035417 ENST00000372360.9|RPS24-202     0 +           1
## 11 chr10 78035510 78035720 ENST00000372360.9|RPS24-202     0 +           1
## 12 chr10 78037193 78037304 ENST00000372360.9|RPS24-202     0 +           1
## 13 chr10 78040203 78040225 ENST00000372360.9|RPS24-202     0 +           1
## 14 chr10 78040614 78040697 ENST00000372360.9|RPS24-202     0 +           1
## 15 chr10 78033862 78033904 ENST00000478655.6|RPS24-210     0 +           4
## 16 chr10 78035351 78035417 ENST00000478655.6|RPS24-210     0 +           4
## 17 chr10 78035510 78035720 ENST00000478655.6|RPS24-210     0 +           4
## 18 chr10 78037193 78037883 ENST00000478655.6|RPS24-210     0 +           4
## 19 chr10 78033759 78033904 ENST00000435275.5|RPS24-203     0 +           2
## 20 chr10 78035351 78035417 ENST00000435275.5|RPS24-203     0 +           2
## Joining with `by = join_by(transcript_id)`
## Joining with `by = join_by(transcript_id)`
## Adding missing grouping variables: `transcript_type`, `transcript_name`
## # A tibble: 7 × 4
##   name                        strand    start      end
##   <chr>                       <chr>     <dbl>    <dbl>
## 1 ENST00000372360.9|RPS24-202 +      78033862 78040697
## 2 ENST00000435275.5|RPS24-203 +      78033759 78040713
## 3 ENST00000466129.6|RPS24-207 +      78033885 78036529
## 4 ENST00000478655.6|RPS24-210 +      78033862 78037883
## 5 ENST00000480662.2|RPS24-211 +      78038555 78040701
## 6 ENST00000482069.5|RPS24-212 +      78033916 78040696
## 7 ENST00000613865.5|RPS24-214 +      78033759 78040716